rm(list = ls())

library(tidyverse)
library(lubridate)
library(here)
# Load data ---------------------------------------------------------------

#Parliamentary interventions
resolutions <- read_delim(here("data", "ticino_parliament", "ticino_full.txt"), col_names = F, delim = ";")
names(resolutions) <- c("title", "author", "type", "date")

resolutions$author_original <- resolutions$author
resolutions$author <- resolutions$author %>% str_squish() %>% str_squish() %>% str_squish() %>% str_squish() %>% str_squish() %>% str_squish() %>% str_squish() %>% str_squish()

#Extract date, year
resolutions$date <- resolutions$date %>% str_remove_all("Data: ")
resolutions$date <- resolutions$date %>% dmy
resolutions$year <- year(resolutions$date)


#Names of MPs
mp_names <- readxl::read_excel(here("data", "ticino_parliament", "GC_TI_1987-2019.xlsx"))


#Convert to title because of some all capital/all small authors
resolutions <- resolutions %>% mutate(author = str_replace_all(author, "(\\.)(\\w)", "\\1 \\2")) #Sometimes they forget to add space after dot. I want the letter after dot to be capitalized. So Im going to add space after dot
resolutions$author <- resolutions$author %>% str_to_title() 

#Remove spaces before dots
resolutions$author[which(str_detect(resolutions$author, " \\."))] <- resolutions %>% filter(str_detect(author, " \\.")) %>% pull(author) %>% str_replace_all(" \\.", ".")

#Check cases where one letter is followed by dot followed by space followed by one letter followed by dot
resolutions$author[which(str_detect(resolutions$author, "\\w\\. \\w\\."))] <- resolutions %>% filter(str_detect(author, "\\w\\. \\w\\.")) %>% pull(author) %>% str_replace_all("(\\w\\.) (\\w\\.)", "\\1\\2")

# Organize MP names -------------------------------------------------------
#Create columns for first-last name and last-first name
mp_names <- mp_names %>% 
  rowwise %>% 
  mutate(first_last = paste(Nome, Cognome, collase = " "),
         last_first = paste(Cognome, Nome, collase = " "))


#Check duplicate first and last names
mp_names_unique <- mp_names %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup

#Extract last names for the unique last names and duplicate last names separately
mp_names_unique <- mp_names_unique %>% group_by(Cognome) %>% mutate(n = n()) %>% ungroup
unique_last_names <- mp_names_unique %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_last_names <- mp_names_unique %>% filter(n > 1) %>% pull(Cognome) %>% paste0(collapse = "|")

resolutions$author <- resolutions$author %>% str_replace_all("-", " ")



# Create col names to be filled out later ---------------------------------------------------
dup_names <- mp_names %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% pull(Cognome)


resolutions$author_unique <- resolutions$author_unique_compound <- NA
resolutions$author_duplicate <- resolutions$author_duplicate2 <- NA



# 1987 parliament ---------------------------------------------------------
first_yr <- 1987
last_yr <- first_yr+3

#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
unique_last_names_final <- paste0(unique_last_names3, "|", unique_last_names2, "|", unique_last_names1) #For some reason collapse didn't work here


##Extract unique authors
resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)

##Double check the names that consist of two words: if only one word was included
double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1)

##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")

resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_names)

# 1991 parliament ---------------------------------------------------------
first_yr <- 1991
last_yr <- first_yr+3

#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
unique_last_names_final <- paste0(unique_last_names3, "|", unique_last_names2, "|", unique_last_names1) #For some reason collapse didn't work here


##Extract unique authors
resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)


##Double check the names that consist of two words: if only one word was included
double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

#Fix the names in the authors list
resolutions$author_unique_compound[which(resolutions$year %in% first_yr:last_yr & 
                                           str_detect(resolutions$author, "Masoni|Simoneschi"))] <- resolutions %>% 
  filter(year %in% first_yr:last_yr) %>% 
  filter(str_detect(author, "Masoni|Simoneschi")) %>% pull(author) %>% 
  str_replace_all("Masoni", "Masoni Pelloni") %>% 
  str_replace_all("Simoneschi", "Simoneschi Cortesi") %>% 
  str_extract_all("Masoni Pelloni|Simoneschi Cortesi")


#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1)


##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")
duplicate_all <- paste0(duplicate_names_lf, "|", duplicate_names_fl)


resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_all)


resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- 
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% 
  str_replace_all("Agustoni C ", "Agustoni Carla") %>% 
  str_replace_all("Agustoni C\\. ", "Agustoni Carla") %>% 
  str_replace_all("Agustoni E\\. ", "Agustoni Emanuela") %>% 
  str_replace_all("Bernasconi Lm ", "Bernasconi Luigi Mattia") %>% 
  str_replace_all("Bernasconi L\\.", "Bernasconi Luigi Mattia") %>% 
  str_replace_all("Bernasconi Pierluigi", "Bernasconi Luigi Mattia") %>% 
  str_replace_all("Bernasconi P\\.", "Bernasconi Luigi Mattia") %>% 
  str_replace_all("Bernasconi Pl ", "Bernasconi Luigi Mattia") %>% 
  str_replace_all("Bernasconi P ", "Bernasconi Luigi Mattia") %>% 
  str_replace_all("Bignasca A ", "Bignasca Attilio") %>% #This could be either Attilio or Antonella but both from Lega and represent Lougano
  str_replace_all("Carobbio Guscetti (Marina)?", "Carobbio Marina") %>% 
  str_replace_all("Carobbio (?!Guscetti)(?!Marina)", "Carobbio Werner") %>%
  str_replace_all("Lepori B\\.", "Lepori Bruno") %>% 
  str_replace_all("Lepori B ", "Lepori Bruno") %>% 
  str_replace_all("Lepori B$", "Lepori Bruno") %>% 
  str_replace_all("Lepori Colombo Francesca", "Lepori Francesca") %>% 
  str_replace_all("Pelloni S\\.", "Pelloni Sandro") %>% 
  str_replace_all("Pelloni R\\.", "Pelloni Remo") %>% 
  str_replace_all("Pelloni R$", "Pelloni Remo") %>% 
  str_replace_all("Pelloni R ", "Pelloni Remo") %>% 
  str_replace_all("Robbiani D ", "Robbiani Dario") %>% 
  str_replace_all("Robbiani D\\.", "Robbiani Dario") %>% 
  str_replace_all("Robbiani F\\.", "Robbiani Fiorenzo") %>% 
  str_replace_all("Attilio Bignasca", "Bignasca Attilio")
  
resolutions$author_duplicate2[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_names_lf)

# 1995 parliament ---------------------------------------------------------
first_yr <- 1995
last_yr <- first_yr+3

#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
unique_last_names_final <- paste0(unique_last_names3, "|", unique_last_names2, "|", unique_last_names1) #For some reason collapse didn't work here


##Extract unique authors
resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)


##Double check the names that consist of two words: if only one word was included
double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

#Fix the names in the authors list
resolutions$author_unique_compound[which(resolutions$year %in% first_yr:last_yr & 
                                           str_detect(resolutions$author, "Sadis Laura|Simoneschi C\\.|Carobbio Marina|Marina Carobbio|Cavalleri"))] <- resolutions %>% 
  filter(year %in% first_yr:last_yr) %>% 
  filter(str_detect(author, "Sadis Laura|Simoneschi C\\.|Carobbio Marina|Marina Carobbio|Cavalleri")) %>% pull(author) %>% 
  str_replace_all("Sadis Laura", "Sadis Riva Laura") %>% 
  str_replace_all("Simoneschi C\\.", "Simoneschi Cortesi") %>% 
  str_replace_all("Carobbio Marina", "Carobbio Guscetti") %>% 
  str_replace_all("Marina Carobbio", "Carobbio Guscetti") %>% 
  str_replace_all("Cavalleri", "Allidi-Cavalleri Consuelo") %>% 
  str_extract_all("Sadis Riva Laura|Simoneschi Cortesi|Carobbio Guscetti|Allidi-Cavalleri Consuelo")

#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1)

##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")
duplicate_all <- paste0(duplicate_names_lf, "|", duplicate_names_fl)

##First, extract authors who have last name first name (or first name last name)
resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_all)

resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- 
  resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% 
  str_replace_all("Bergonzoli S\\.", "Bergonzoli Silvano") %>% 
  str_replace_all("Bergonzoli S ", "Bergonzoli Silvano") %>% 
  str_replace_all("Bergonzoli E ", "Bergonzoli Eros") %>% 
  str_replace_all("Bergonzoli E\\.", "Bergonzoli Eros") %>% 
  str_replace_all("Canonica G ", "Canonica Giorgio") %>% 
  str_replace_all("Caninica I\\.", "Canonica Iris") %>%  #Notice the spelling mistake
  str_replace_all("Canonica I ", "Canonica Iris") %>% 
  str_replace_all("Canonica I\\.", "Canonica Iris") %>% 
  str_replace_all("Canonica G\\.", "Canonica Giorgio") %>% 
  str_replace_all("Carobbio K.", "Carobbio Katia") %>% 
  str_replace_all("Carobbio K ", "Carobbio Katia") %>%
  str_replace_all("Carobbio W ", "Carobbio Werner") %>%
  str_replace_all("Ferrari F ", "Ferrari Franco") %>% 
  str_replace_all("Ferrari M ", "Ferrari Mario") %>% 
  str_replace_all("Ferrari M\\.", "Ferrari Mario") %>% 
  str_replace_all("Ferrari F\\.", "Ferrari Franco") %>% 
  str_replace_all("Foletti M ", "Foletti Michele") %>% 
  str_replace_all("Foletti M\\.", "Foletti Michele") %>% 
  str_replace_all("Foletti G\\.P\\.", "Foletti Giampaolo") %>% 
  str_replace_all("Foletti Gian Paolo", "Foletti Giampaolo") %>% 
  str_replace_all("Foletti Gp ", "Foletti Giampaolo") 

resolutions$author_duplicate2[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_names_lf)

# 1999 parliament ---------------------------------------------------------
first_yr <- 1999
last_yr <- first_yr+3

#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
#Notice: It's important to put the compound names first, otherwise will extract single names from compound names
unique_last_names_final <- paste0(unique_last_names2, "|", unique_last_names3, "|", unique_last_names1) #For some reason collapse didn't work here


resolutions[which(resolutions$year %in% first_yr:last_yr),] <- 
  resolutions %>% filter(year %in% first_yr:last_yr) %>% mutate(author = str_replace(author, "Francesca Lepori", "Lepori Colombo"))

resolutions[which(resolutions$year %in% first_yr:last_yr),] <- 
  resolutions %>% filter(year %in% first_yr:last_yr) %>% mutate(author = str_replace(author, "Lepori Colombo Lo$", "Lepori Lo Colombo"))


resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)

double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

#Fix the names in the authors list
resolutions$author_unique_compound[which(resolutions$year %in% first_yr:last_yr & 
                                           str_detect(resolutions$author, "Bosia|Cavalleri"))] <- resolutions %>% 
  filter(year %in% first_yr:last_yr) %>% 
  filter(str_detect(author, "Bosia|Cavalleri")) %>% pull(author) %>% 
  str_replace_all("Bosia", "Bosia Volkmer") %>% 
  str_replace_all("Cavalleri", "Allidi Cavalleri") %>% 
  str_extract_all("Bosia Volkmer|Allidi Cavalleri")


#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1)


##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")
mp_dups_fst_ltr <- mp_names_dups %>% pull(last_first) %>% trimws %>% str_extract("\\w+\\s\\w")
mp_dups_dot <- paste0(mp_dups_fst_ltr, "\\.") %>% paste0(collapse = "|")
mp_dups_space <- paste0(mp_dups_fst_ltr, " ") %>% paste0(collapse = "|")

duplicate_all <- paste0(duplicate_names_lf, "|", duplicate_names_fl, "|", mp_dups_dot, "|", mp_dups_space)

##First, extract authors who have last name first name (or first name last name)
resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_all)



resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- 
  resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% 
  str_replace_all("Carobbio Guscetti( Marina)?", "Carobbio Marina") %>% 
  str_replace_all("Carobbio M\\.", "Carobbio Marina") %>% 
  str_replace_all("Carobbio M ", "Carobbio Marina") %>% 
  str_replace_all("Carobbio (?!Guscetti)(?!Marina)", "Carobbio Werner") %>%
  str_replace_all("Ferrari C$", "Ferrari Cleto")

resolutions$author_duplicate2[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_names_lf)

#Remove Ferrari M. here because the name is unclear (There are two Ferrari Ms)
resolutions$author_duplicate[which(resolutions$year %in% 
                                     first_yr:last_yr & str_detect(resolutions$author_duplicate, "Ferrari M\\."))] <- 
  resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr & str_detect(resolutions$author_duplicate, "Ferrari M\\."))] %>% str_remove_all("Ferrari M\\.")

# 2003 parliament ---------------------------------------------------------
first_yr <- 2003
last_yr <- first_yr+3

#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")
unique_last_names1 <- unique_last_names1 %>% str_remove("Carobbio\\|") #Remove Carobbio because there's also Carobbio Guscetti


##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
unique_last_names_final <- paste0(unique_last_names3, "|", unique_last_names2, "|", unique_last_names1) #For some reason collapse didn't work here


##Extract unique authors
resolutions$author[which(str_detect(resolutions$author, "DucaWidmer"))] <- resolutions$author[which(str_detect(resolutions$author, "DucaWidmer"))] %>% str_replace("DucaWidmer", "Duca Widmer")
resolutions$author[which(str_detect(resolutions$author, "BerettaPiccoli"))] <- resolutions$author[which(str_detect(resolutions$author, "BerettaPiccoli"))] %>% str_replace("BerettaPiccoli", "Beretta Piccoli")
resolutions$author[which(str_detect(resolutions$author, "DeRosa"))] <- resolutions$author[which(str_detect(resolutions$author, "DeRosa"))] %>% str_replace("DeRosa", "De Rosa")

resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)

double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

#Fix the names in the authors list
resolutions$author_unique_compound[which(resolutions$year %in% first_yr:last_yr & 
                                           str_detect(resolutions$author, "Carobbio"))] <- resolutions %>% 
  filter(year %in% first_yr:last_yr) %>% 
  filter(str_detect(author, "Carobbio")) %>% pull(author) %>% 
  str_replace_all("Carobbio M\\.", "Carobbio Guscetti Marina") %>% 
  str_extract_all("Carobbio Guscetti Marina")

#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1)
mp_names_dups <- mp_names_dups %>% bind_rows(mp_names %>% filter(str_detect(Cognome, "Carobbio"), Anno == first_yr)) #Add Carobbio to last names

##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")
mp_dups_fst_ltr <- mp_names_dups %>% pull(last_first) %>% trimws %>% str_extract("\\w+\\s\\w")
mp_dups_dot <- paste0(mp_dups_fst_ltr, "\\.") %>% paste0(collapse = "|")
mp_dups_space <- paste0(mp_dups_fst_ltr, " ") %>% paste0(collapse = "|")

duplicate_all <- paste0(duplicate_names_lf, "|", duplicate_names_fl, "|", mp_dups_dot, "|", mp_dups_space)

##First, extract authors who have last name first name (or first name last name)
resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_all)


duplicate_names_lf <- paste0(duplicate_names_lf, "|", "Beretta Piccoli Luca", "|", "Beretta Piccoli Fausto")

resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- 
  resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% 
  str_replace_all("Carobbio M\\.", "Carobbio Marina") %>% 
  str_replace_all("Beretta Piccoli F\\.", "Beretta Piccoli Fausto") %>% 
  str_replace_all("Fausto Beretta Piccoli", "Beretta Piccoli Fausto") %>% 
  str_replace_all("Beretta Piccoli L\\.", "Beretta Piccoli Luca") %>% 
  str_replace_all("F\\. Beretta Piccoli", "Beretta Piccoli Fausto") %>% 
  str_replace_all("D\\. Ghisletta", "Ghisletta Dario") %>% 
  str_replace_all("R\\. Ghisletta", "Ghisletta Raoul")

resolutions$author_duplicate2[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_names_lf)

resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] %>% str_remove_all("Beretta Piccoli")


# 2007 parliament ---------------------------------------------------------
first_yr <- 2007
last_yr <- first_yr+3


#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
unique_last_names_final <- paste0(unique_last_names3, "|", unique_last_names2, "|", unique_last_names1) #For some reason collapse didn't work here


##Extract unique authors
resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)

##Double check the names that consist of two words: if only one word was included
double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

#No names exist in the duplicate names

#Fix the names in the authors list


#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% arrange(Cognome)


##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")
mp_dups_fst_ltr <- mp_names_dups %>% pull(last_first) %>% trimws %>% str_extract("\\w+\\s\\w")
mp_dups_dot <- paste0(mp_dups_fst_ltr, "\\.") %>% paste0(collapse = "|")
mp_dups_space <- paste0(mp_dups_fst_ltr, " ") %>% paste0(collapse = "|")

duplicate_all <- paste0(duplicate_names_lf, "|", duplicate_names_fl, "|", mp_dups_dot, "|", mp_dups_space)

##First, extract authors who have last name first name (or first name last name)
resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_all)


resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- 
  resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% 
  str_replace_all("Ghisletta D$", "Ghisletta Dario") %>% 
  str_replace_all("raoul Ghisletta", "Ghisletta Raoul") %>% 
  str_replace_all("attilio Bignasca", "Bignasca Attilio")

resolutions$author_duplicate2[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_names_lf)


# 2011 parliament ---------------------------------------------------------
first_yr <- 2011
last_yr <- first_yr+3


#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")


##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
unique_last_names_final <- paste0(unique_last_names3, "|", unique_last_names2, "|", unique_last_names1) #For some reason collapse didn't work here


##Extract unique authors
resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)

##Double check the names that consist of two words: if only one word was included
double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_remove_all("Bacchetta Elena")

#Fix the names in the authors list
resolutions$author_unique_compound[which(resolutions$year %in% first_yr:last_yr &
                                           str_detect(resolutions$author, "Bacchetta|Crivelli"))] <- resolutions %>%
  filter(year %in% first_yr:last_yr) %>%
  filter(str_detect(author, "Bacchetta|Crivelli")) %>% pull(author) %>%
  str_replace_all("Bacchetta", "Bacchetta Cattori Fabio") %>%
  str_replace_all("Crivelli", "Crivelli Barella Claudia") %>%
  str_extract_all("Bacchetta Cattori Fabio|Crivelli Barella Claudia")

#Keep checking the compound names: double names last part

#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% arrange(Cognome)


##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")
mp_dups_fst_ltr <- mp_names_dups %>% pull(last_first) %>% trimws %>% str_extract("\\w+\\s\\w")
mp_dups_dot <- paste0(mp_dups_fst_ltr, "\\.") %>% paste0(collapse = "|")
mp_dups_space <- paste0(mp_dups_fst_ltr, " ") %>% paste0(collapse = "|")

duplicate_all <- paste0(duplicate_names_lf, "|", duplicate_names_fl, "|", mp_dups_dot, "|", mp_dups_space)

##First, extract authors who have last name first name (or first name last name)
resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_all)

#Note Savoia Michelle and Savoia Sergio are both from Bellinzona and represent Verdi party

resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- 
  resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% 
  str_replace_all("Savoia", "Savoia Michelle") #Note both Savoias come from same party and same municipality so can sub both with the same one

resolutions$author_duplicate2[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_names_lf)

#Lurati is unidentified here because it's a duplicate name but the parliament resolutions data doesn't identify him



# 2015 parliament ---------------------------------------------------------
first_yr <- 2015
last_yr <- first_yr+3


#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")

##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")
unique_last_names2 <- paste0(unique_last_names2, "|Delco Petralli") #This name has an accent


##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
unique_last_names_final <- paste0(unique_last_names3, "|", unique_last_names2, "|", unique_last_names1) #For some reason collapse didn't work here


##Extract unique authors
resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)


##Double check the names that consist of two words: if only one word was included
double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

#Fix the names in the authors list
resolutions$author_unique_compound[which(resolutions$year %in% first_yr:last_yr &
                                           str_detect(resolutions$author, "Ferrara|Petralli|Lurati|Grassi"))] <- resolutions %>%
  filter(year %in% first_yr:last_yr) %>%
  filter(str_detect(author, "Ferrara|Petralli|Lurati|Grassi")) %>% pull(author) %>%
  str_replace_all("Ferrara", "Ferrara Micocci Natalia") %>%
  str_replace_all("Declò Petralli", "Delcò Petralli Michela") %>%
  str_replace_all("Lurati Grassi|Lurati Tatiano|Grassi Tatiano|Grassi Lurati|Lurati G\\.|Lurati T\\.|Grassi T\\.|Lurati G |Lurati T | Grassi T ", "Lurati Grassi Tatiana") %>% 
  str_extract_all("Ferrara Micocci Natalia|Delcò Petralli Michela|Lurati Grassi Tatiana")

#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% arrange(Cognome)

##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")
mp_dups_fst_ltr <- mp_names_dups %>% pull(last_first) %>% trimws %>% str_extract("\\w+\\s\\w")
mp_dups_dot <- paste0(mp_dups_fst_ltr, "\\.") %>% paste0(collapse = "|")
mp_dups_space <- paste0(mp_dups_fst_ltr, " ") %>% paste0(collapse = "|")

duplicate_all <- paste0(duplicate_names_lf, "|", duplicate_names_fl, "|", mp_dups_dot, "|", mp_dups_space)


##First, extract authors who have last name first name (or first name last name)
resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_all)



# 2019 parliament ---------------------------------------------------------
first_yr <- 2019
last_yr <- first_yr+3


#Try to remove empthy white space in middle of text. Be careful: doesn't always work!
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% str_squish

# START WITH UNIQUE LAST NAMES

##Identify unique names in first_yr
unique_last_names1 <- mp_names %>% filter(Anno == first_yr) %>% filter(!str_detect(Cognome, "\\s|-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")


##Unique names that consist of two words with space between them
unique_last_names2 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, " ")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% paste0(collapse = "|")


##Unique names that consist of two words with dash between them (replace dash with space because I removed dashes above)
unique_last_names3 <- mp_names %>% filter(Anno == first_yr) %>% filter(str_detect(Cognome, "-")) %>% group_by(last_first) %>% filter(row_number() == 1) %>% ungroup %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% pull(Cognome) %>% str_replace_all("-", " ") %>% paste0(collapse = "|")

##Combine all
unique_last_names_final <- paste0(unique_last_names3, "|", unique_last_names2, "|", unique_last_names1) #For some reason collapse didn't work here


##Extract unique authors
resolutions$author_unique[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(unique_last_names_final)

##Double check the names that consist of two words: if only one word was included
double_names_first_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("\\w+-|\\w+ ") %>% str_remove("-$| $") %>% paste0(collapse = "|")

double_names_last_part <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% filter(str_detect(Cognome, "\\s|-")) %>% 
  pull(Cognome) %>% str_extract("-\\w+| \\w+") %>% str_remove("^-|^ ") %>% paste0(collapse = "|")

#Fix the names in the authors list
resolutions$author_unique_compound[which(resolutions$year %in% first_yr:last_yr &
                                           str_detect(resolutions$author, "Crivelli Barelli Claudia|Barella Crivelli|Lurati"))] <- resolutions %>%
  filter(year %in% first_yr:last_yr) %>%
  filter(str_detect(author, "Crivelli Barelli Claudia|Barella Crivelli|Lurati")) %>% pull(author) %>%
  str_replace_all("Crivelli Barelli Claudia", "Crivelli Barella Claudia") %>%
  str_replace_all("Barella Crivelli", "Crivelli Barella Claudia") %>%
  str_replace_all("Lurati", "Lurati Grassi Tatiana") %>%
  str_extract_all("Crivelli Barella Claudia|Lurati Grassi Tatiana")


#DUPLICATE LAST NAMES
mp_names_dups <- mp_names %>% filter(Anno == first_yr) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% arrange(Cognome)

##Duplicate names
duplicate_names <- mp_names_dups %>% pull(Cognome) %>% paste0(collapse = "|")
duplicate_names_lf <- mp_names_dups %>% pull(last_first) %>% trimws() %>% paste0(collapse = "|")
duplicate_names_fl <- mp_names_dups %>% pull(first_last) %>% trimws() %>% paste0(collapse = "|")
mp_dups_fst_ltr <- mp_names_dups %>% pull(last_first) %>% trimws %>% str_extract("\\w+\\s\\w")
mp_dups_dot <- paste0(mp_dups_fst_ltr, "\\.") %>% paste0(collapse = "|")
mp_dups_space <- paste0(mp_dups_fst_ltr, " ") %>% paste0(collapse = "|")

duplicate_all <- paste0(duplicate_names_lf, "|", duplicate_names_fl)#, "|", mp_dups_dot, "|", mp_dups_space) Cannot include dots/spaces because the Gianella's share the same first name initial

##First, extract authors who have last name first name (or first name last name)
resolutions$author_duplicate[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_all)

##Next: check ones where last name has been detected but has not been included in the author_duplicate column
resolutions$author[which(resolutions$year %in% first_yr:last_yr)] <- 
   resolutions$author[which(resolutions$year %in% first_yr:last_yr)] %>% 
   str_replace_all("Bignasca", "Bignasca Attilio") #Note both Bignascas come from same party and same municipality so can sub both with the same one

resolutions$author_duplicate2[which(resolutions$year %in% first_yr:last_yr)] <- resolutions %>% filter(year %in% first_yr:last_yr) %>% pull(author) %>% str_extract_all(duplicate_names_lf)

#Lurati is unidentified here because it's a duplicate name but the parliament resolutions data doesn't identify him


# Clean 1987 --------------------------------------------------------
resolutions$author_unique <- resolutions$author_unique %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

resolutions$author_unique_compound <- resolutions$author_unique_compound %>% paste %>% str_remove_all('\\"') %>% 
  str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% str_remove_all("c\\(") %>% str_remove_all("\\)")

resolutions$author_duplicate <- resolutions$author_duplicate %>% paste %>% str_remove_all('\\"') %>% 
  str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% str_remove_all("c\\(") %>% str_remove_all("\\)")

resolutions$author_duplicate2 <- resolutions$author_duplicate2 %>% paste %>% str_remove_all('\\"') %>% 
  str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% str_remove_all("c\\(") %>% str_remove_all("\\)")

resolutions <- resolutions %>% filter(year >= 1987)

#1987
resolutions87 <- resolutions %>% filter(year <= 1990)
##List of unique mp names
mps_87_unique <- mp_names %>% filter(Anno == 1987) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_87_unique$Cognome <- mps_87_unique$Cognome %>% str_replace_all("-", " ")

#Find people who are kind of duplicates (compound name that includes component of single name)
#Make sure Lepori Colombo comes before either Lepori or Colombo!!!
mps_87_unique <- mps_87_unique %>% filter(Cognome == "Lepori Bonetti") %>% 
  bind_rows(mps_87_unique %>% filter(Cognome == "Lepori")) %>% 
  bind_rows(mps_87_unique %>% filter(!Cognome %in% c("Lepori Bonetti", "Lepori")))


##list of duplicated mp names
mps_87_dups <- mp_names %>% filter(Anno == 1987) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_87_dups$Cognome <- mps_87_dups$Cognome %>% str_replace_all("-", " ")

##Final list of MP names
#Create MP names that match the final one
mps_87_final <- mps_87_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_87_dups %>% 
              mutate(author_final = paste(Cognome, Nome)))

mps_87_final <- mps_87_final %>% select(-c(first_last, last_first, Anno))

##Extract all unique names
resolutions87$author_final <- str_extract_all(resolutions87$author_unique, paste0(mps_87_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

#Duplicate names
resolutions87$author_duplicate <- resolutions87$author_duplicate %>% paste

resolutions87$author_duplicate <- resolutions87$author_duplicate %>% str_replace_all("Noseda", "Noseda John")

resolutions87 <- resolutions87 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, author_duplicate, collapse = ", "))

#Create ID variable
resolutions87$id <- paste0(1987, "-", 1:nrow(resolutions87))

#separete into a row for each author
resolutions87 <- resolutions87 %>% separate_rows(author_final, sep = ",")
resolutions87$author_final <- resolutions87$author_final %>% trimws()

#Remove same author who appears twice for the same resolution
resolutions87 <- resolutions87 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions87 <- resolutions87 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2))

#Merge
resolutions87 <- resolutions87 %>% left_join(mps_87_final)

# Clean 1991 --------------------------------------------------------
resolutions91 <- resolutions %>% filter(year >= 1991, year <= 1994)

##List of unique mp names
mps_91_unique <- mp_names %>% filter(Anno == 1991) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_91_unique$Cognome <- mps_91_unique$Cognome %>% str_replace_all("-", " ")

mps_91_unique <- mps_91_unique %>% filter(Cognome == "Vago Pedrazzini") %>% 
  bind_rows(mps_91_unique %>% filter(Cognome == "Pedrazzini")) %>% 
  bind_rows(mps_91_unique %>% filter(!Cognome %in% c("Vago Pedrazzini", "Pedrazzini")))

##list of duplicated mp names
mps_91_dups <- mp_names %>% filter(Anno == 1991) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_91_dups$Cognome <- mps_91_dups$Cognome %>% str_replace_all("-", " ")

##Final list of MP names
#Create MP names that match the final one
mps_91_final <- mps_91_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_91_dups %>% 
              mutate(author_final = paste(Cognome, Nome)))

mps_91_final <- mps_91_final %>% select(-c(first_last, last_first, Anno))

##Extract all unique names
resolutions91 <- resolutions91 %>% 
  rowwise %>% 
  mutate(author_unique = paste0(author_unique, ", ", author_unique_compound))

resolutions91$author_final <- str_extract_all(resolutions91$author_unique, paste0(mps_91_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

##Make sure we didn't miss anyone
resolutions91 <- resolutions91 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", ""))
resolutions91$adist <- adist(resolutions91$author_final, resolutions91$author_check) %>% diag

#Duplicate names
resolutions91 <- resolutions91 %>% 
  rowwise() %>% 
  mutate(author_duplicate_fin = paste0(author_duplicate, ", ", author_duplicate2) %>% trimws %>% str_remove_all(",$") %>% 
           str_remove_all("^,") %>% trimws())

#Check they exist in mp names
dups_check <- resolutions91$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions91$author_duplicate_fin <- resolutions91$author_duplicate_fin %>% 
  str_replace_all("Attilio Bignasca", "Bignasca Attilio") %>% 
  str_replace_all("Benito Bernasconi", "Bernasconi Benito")

resolutions91 <- resolutions91 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, ", ", author_duplicate_fin))


#Create ID variable
resolutions91$id <- paste0(1991, "-", 1:nrow(resolutions91))

#separete into a row for each author
resolutions91$author_final <- resolutions91$author_final %>% trimws() %>% str_remove_all(",$") %>% str_remove_all("^,")
resolutions91 <- resolutions91 %>% separate_rows(author_final, sep = ",")

resolutions91$author_final <- resolutions91$author_final %>% trimws()

#Remove same author who appears twice for the same resolution
resolutions91 <- resolutions91 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions91 <- resolutions91 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2, author_duplicate_fin,
                                            author_check))
#Merge
resolutions91 <- resolutions91 %>% left_join(mps_91_final)


# Clean 1995 --------------------------------------------------------
resolutions95 <- resolutions %>% filter(year >= 1995, year <= 1998)
##List of unique mp names
mps_95_unique <- mp_names %>% filter(Anno == 1995) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_95_unique$Cognome <- mps_95_unique$Cognome %>% str_replace_all("-", " ")

#Find people who are kind of duplicates (compound name that includes component of single name)
#Make sure Lepori Colombo comes before either Lepori or Colombo!!!
mps_95_unique <- mps_95_unique %>% filter(Cognome == "Lepori Colombo") %>% 
  bind_rows(mps_95_unique %>% filter(Cognome == "Colombo")) %>% 
  bind_rows(mps_95_unique %>% filter(!Cognome %in% c("Lepori Colombo", "Colombo")))


##list of duplicated mp names
mps_95_dups <- mp_names %>% filter(Anno == 1995) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_95_dups$Cognome <- mps_95_dups$Cognome %>% str_replace_all("-", " ")

##Final list of MP names
#Create MP names that match the final one
mps_95_final <- mps_95_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_95_dups %>% 
              mutate(author_final = paste(Cognome, Nome)))

mps_95_final <- mps_95_final %>% select(-c(first_last, last_first, Anno))

##Extract all unique names
resolutions95$author_unique_compound <- resolutions95$author_unique_compound %>% str_replace_all("-", " ")

resolutions95 <- resolutions95 %>% 
  rowwise %>% 
  mutate(author_unique = paste0(author_unique, ", ", author_unique_compound))

resolutions95$author_final <- str_extract_all(resolutions95$author_unique, paste0(mps_95_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

##Make sure we didn't miss anyone
resolutions95 <- resolutions95 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", ""))
resolutions95$adist <- adist(resolutions95$author_final, resolutions95$author_check) %>% diag

#Duplicate names
resolutions95 <- resolutions95 %>% 
  rowwise() %>% 
  mutate(author_duplicate_fin = paste0(author_duplicate, ", ", author_duplicate2) %>% trimws %>% str_remove_all(",$") %>% 
           str_remove_all("^,") %>% trimws())

#Check they exist in mp names
dups_check <- resolutions95$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions95$author_duplicate_fin <- resolutions95$author_duplicate_fin %>% 
  str_replace_all("Silvano Bergonzoli", "Bergonzoli Silvano") %>% 
  str_replace_all("Massimo Ferrari", "Ferrari Massimo") %>% 
  str_replace_all("Mario Ferrari", "Ferrari Mario")

resolutions95 <- resolutions95 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, ", ", author_duplicate_fin))

#Fixing Foletti
resolutions95$author_final[which(str_detect(resolutions95$author_original, "Foletti G\\.P\\."))] <-  paste0(resolutions95$author_final[which(str_detect(resolutions95$author_original, "Foletti G\\.P\\."))], ", Foletti Giampaolo")
resolutions95$author_final[which(str_detect(resolutions95$author_original, "Foletti -"))] <- paste0(resolutions95$author_final[which(str_detect(resolutions95$author_original, "Foletti -"))], ", Foletti Michele")

#Create ID variable
resolutions95$id <- paste0(1995, "-", 1:nrow(resolutions95))


#separete into a row for each author
resolutions95$author_final <- resolutions95$author_final %>% trimws() %>% str_remove_all(",$") %>% str_remove_all("^,")
resolutions95 <- resolutions95 %>% separate_rows(author_final, sep = ",")

resolutions95$author_final <- resolutions95$author_final %>% trimws()

#Remove same author who appears twice for the same resolution
resolutions95 <- resolutions95 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions95 <- resolutions95 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2, author_duplicate_fin,
                                            author_check))

#Merge
resolutions95 <- resolutions95 %>% left_join(mps_95_final)

# Clean 1999 --------------------------------------------------------
resolutions99 <- resolutions %>% filter(year >= 1999, year <= 2002)

##List of unique mp names
mps_99_unique <- mp_names %>% filter(Anno == 1999) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_99_unique$Cognome <- mps_99_unique$Cognome %>% str_replace_all("-", " ")

#Make sure Lepori Colombo comes before either Lepori or Colombo!!!

mps_99_unique <- mps_99_unique %>% filter(Cognome == "Lepori Colombo") %>% 
  bind_rows(mps_99_unique %>% filter(Cognome == "Lepori")) %>% 
  bind_rows(mps_99_unique %>% filter(Cognome == "Colombo")) %>% 
  bind_rows(mps_99_unique %>% filter(!Cognome %in% c("Colombo", "Lepori", "Lepori Colombo")))

##list of duplicated mp names
mps_99_dups <- mp_names %>% filter(Anno == 1999) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_99_dups$Cognome <- mps_99_dups$Cognome %>% str_replace_all("-", " ")

##Final list of MP names
#Create MP names that match the final one
mps_99_final <- mps_99_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_99_dups %>% 
              mutate(author_final = paste(Cognome, Nome))) %>% 
  bind_rows(mps_99_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1), "."))) %>% 
  bind_rows(mps_99_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1))))

mps_99_final <- mps_99_final %>% select(-c(first_last, last_first, Anno))
mps_99_final <- mps_99_final %>% filter(!author_final %in% c("Ferrari M.", "Ferrari M")) #There are two Ferrari M.

##Extract all unique names
resolutions99$author_unique_compound <- resolutions99$author_unique_compound %>% str_replace_all("-", " ")

resolutions99 <- resolutions99 %>% 
  rowwise %>% 
  mutate(author_unique = paste0(author_unique, ", ", author_unique_compound))

resolutions99$author_final <- str_extract_all(resolutions99$author_unique, paste0(mps_99_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

##Make sure we didn't miss anyone
resolutions99 <- resolutions99 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", ""))
resolutions99$adist <- adist(resolutions99$author_final, resolutions99$author_check) %>% diag

#Duplicate names
resolutions99 <- resolutions99 %>% 
  rowwise() %>% 
  mutate(author_duplicate_fin = paste0(author_duplicate, ", ", author_duplicate2) %>% trimws %>% str_remove_all(",$") %>% 
           str_remove_all("^,") %>% trimws())

#Check they exist in mp names
dups_check <- resolutions99$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions99$author_duplicate_fin <- resolutions99$author_duplicate_fin %>% 
  str_replace_all("Marina Carobbio", "Carobbio Marina") %>% 
  str_replace_all("Franco Ferrari", "Ferrari Franco")

#Check Again
dups_check <- resolutions99$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique
dups_check[which(!dups_check %in% mps_99_final$author_final)]

resolutions99 <- resolutions99 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, ", ", author_duplicate_fin))

resolutions99$author_final[which(str_detect(resolutions99$author_original, "Maspoli Flavi(?!o)"))] <- paste0(resolutions99$author_final[which(str_detect(resolutions99$author_original, "Maspoli Flavi(?!o)"))], ", Maspoli Flavio")


#Create ID variable
resolutions99$id <- paste0(1999, "-", 1:nrow(resolutions99))

#separete into a row for each author
resolutions99$author_final <- resolutions99$author_final %>% trimws() %>% str_remove_all(",$") %>% str_remove_all("^,")
resolutions99 <- resolutions99 %>% separate_rows(author_final, sep = ",")

resolutions99$author_final <- resolutions99$author_final %>% trimws()

#Remove same author who appears twice for the same resolution
resolutions99 <- resolutions99 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions99 <- resolutions99 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2, author_duplicate_fin,
                                            author_check))

#Merge
resolutions99 <- resolutions99 %>% left_join(mps_99_final)

resolutions99 = resolutions99 %>% 
  group_by(id, Cognome, Nome) %>% 
  slice(1) %>% 
  ungroup()


# Clean 2003 --------------------------------------------------------
resolutions03 <- resolutions %>% filter(year >= 2003, year <= 2006)
##List of unique mp names
mps_03_unique <- mp_names %>% filter(Anno == 2003) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_03_unique$Cognome <- mps_03_unique$Cognome %>% str_replace_all("-", " ")

mps_03_unique <- mps_03_unique %>% filter(Cognome == "Lepori Colombo") %>% 
  bind_rows(mps_03_unique %>% filter(Cognome == "Carobbio Guscetti")) %>% 
  bind_rows(mps_03_unique %>% filter(Cognome == "Lepori")) %>% 
  bind_rows(mps_03_unique %>% filter(Cognome == "Colombo")) %>% 
  bind_rows(mps_03_unique %>% filter(Cognome == "Carobbio")) %>% 
  bind_rows(mps_03_unique %>% filter(!Cognome %in% c("Colombo", "Lepori", "Lepori Colombo", "Carobbio Guscetti", "Carobbio")))

mps_03_unique[which(mps_03_unique$Cognome == "Beretta Piccoli"),] <- mps_03_unique %>% 
  filter(str_detect(Cognome, "Beretta Piccoli")) %>% 
  mutate(Cognome = paste(Cognome, Nome) %>% str_remove(' \\"Gerri\\"'))

#Move Beretta Piccolis to author_unique column
resolutions03$author_unique[which(str_detect(resolutions03$author_duplicate2, "Beretta Piccoli \\w+"))] <- 
paste0(resolutions03$author_unique[which(str_detect(resolutions03$author_duplicate2, "Beretta Piccoli \\w+"))], ", ", 
       resolutions03 %>% filter(str_detect(author_duplicate2, "Beretta Piccoli \\w+")) %>% pull(author_duplicate2) %>% str_extract("Beretta Piccoli \\w+"))

#Remove Beretta Piccolis from author_duplicate2 column
resolutions03$author_duplicate2 <- resolutions03$author_duplicate2 %>% str_remove_all("Beretta Piccoli \\w+")

#Remove Carobbio Werner
Werner <- mps_03_unique %>% filter(str_detect(last_first, "Carobbio")) #Include both Carobbios
mps_03_unique <- mps_03_unique %>% filter(!str_detect(last_first, "Carobbio Werner")) #Remove only Werner


##list of duplicated mp names
mps_03_dups <- mp_names %>% filter(Anno == 2003) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_03_dups$Cognome <- mps_03_dups$Cognome %>% str_replace_all("-", " ")

mps_03_dups <- mps_03_dups %>% bind_rows(Werner)

##Final list of MP names
#Create MP names that match the final one
mps_03_final <- mps_03_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_03_dups %>% 
              mutate(author_final = paste(Cognome, Nome))) %>% 
  bind_rows(mps_03_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1), "."))) %>% 
  bind_rows(mps_03_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1))))

mps_03_final <- mps_03_final %>% select(-c(first_last, last_first, Anno))

##Extract all unique names
resolutions03$author_unique_compound <- resolutions03$author_unique_compound %>% str_replace_all("-", " ")

resolutions03 <- resolutions03 %>% 
  rowwise %>% 
  mutate(author_unique = paste0(author_unique, ", ", author_unique_compound))

resolutions03$author_final <- str_extract_all(resolutions03$author_unique, paste0(mps_03_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

##Make sure we didn't miss anyone
resolutions03 <- resolutions03 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>% 
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ","))
resolutions03$adist <- adist(resolutions03$author_final, resolutions03$author_check) %>% diag

### Try again
resolutions03 <- resolutions03 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>% 
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ",") %>% 
                                           str_replace_all("Guscetti Marina", "Guscetti"))
resolutions03$adist <- adist(resolutions03$author_final, resolutions03$author_check) %>% diag


#Duplicate names
resolutions03 <- resolutions03 %>% 
  rowwise() %>% 
  mutate(author_duplicate_fin = paste0(author_duplicate, ", ", author_duplicate2) %>% trimws %>% str_remove_all(",$") %>% 
           str_remove_all("^,") %>% trimws())

#Check they exist in mp names
dups_check <- resolutions03$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions03$author_duplicate_fin <- resolutions03$author_duplicate_fin %>% 
  str_replace_all("Werner Carobbio", "Carobbio Werner") %>% 
  str_replace_all("Norman Gobbi", "Gobbi Norman") %>% 
  str_replace_all("Marina Carobbio Guscetti", "Carobbio Guscetti") %>% 
  str_replace_all("Raoul Ghisletta", "Ghisletta Raoul")

#Check again
dups_check <- resolutions03$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions03 <- resolutions03 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, ", ", author_duplicate_fin))

#Check we caught Carobbio correctly
#Fix Ferrari
resolutions03$author_final[which(str_detect(resolutions03$author_original, "Ferrari C\\. - Ferrari") & 
                                   str_count(resolutions03$author_final, "Ferrari") == 1)] <- paste0(resolutions03$author_final[which(str_detect(resolutions03$author_original, "Ferrari C\\. - Ferrari") & 
                                   str_count(resolutions03$author_final, "Ferrari") == 1)], ", Ferrari M.")


#Create ID variable
resolutions03$id <- paste0(2003, "-", 1:nrow(resolutions03))

#separete into a row for each author
resolutions03$author_final <- resolutions03$author_final %>% trimws() %>% str_remove_all(",$") %>% str_remove_all("^,")
resolutions03 <- resolutions03 %>% separate_rows(author_final, sep = ",")

resolutions03$author_final <- resolutions03$author_final %>% trimws()

#Remove same author who appears twice for the same resolution
resolutions03 <- resolutions03 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions03 <- resolutions03 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2, author_duplicate_fin,
                                            author_check))


#Merge
resolutions03 <- resolutions03 %>% left_join(mps_03_final)

resolutions03 <- resolutions03 %>% 
  group_by(id, Cognome, Nome) %>% 
  slice(1) %>% 
  ungroup()

# Clean 2007 --------------------------------------------------------
resolutions07 <- resolutions %>% filter(year >= 2007, year <= 2010)

##List of unique mp names
mps_07_unique <- mp_names %>% filter(Anno == 2007) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_07_unique$Cognome <- mps_07_unique$Cognome %>% str_replace_all("-", " ")

##list of duplicated mp names
mps_07_dups <- mp_names %>% filter(Anno == 2007) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_07_dups$Cognome <- mps_07_dups$Cognome %>% str_replace_all("-", " ")

##Final list of MP names
#Create MP names that match the final one
mps_07_final <- mps_07_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_07_dups %>% 
              mutate(author_final = paste(Cognome, Nome))) %>% 
  bind_rows(mps_07_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1), "."))) %>% 
  bind_rows(mps_07_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1))))

mps_07_final <- mps_07_final %>% select(-c(first_last, last_first, Anno))

##Extract all unique names
resolutions07$author_unique_compound <- resolutions07$author_unique_compound %>% str_replace_all("-", " ")

resolutions07 <- resolutions07 %>% 
  rowwise %>% 
  mutate(author_unique = paste0(author_unique, ", ", author_unique_compound))

resolutions07$author_final <- str_extract_all(resolutions07$author_unique, paste0(mps_07_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

##Make sure we didn't miss anyone
resolutions07 <- resolutions07 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>% 
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ","))
resolutions07$adist <- adist(resolutions07$author_final, resolutions07$author_check) %>% diag

#Duplicate names
resolutions07 <- resolutions07 %>% 
  rowwise() %>% 
  mutate(author_duplicate_fin = paste0(author_duplicate, ", ", author_duplicate2) %>% trimws %>% str_remove_all(",$") %>% 
           str_remove_all("^,") %>% trimws())

#Check they exist in mp names
dups_check <- resolutions07$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique


resolutions07$author_duplicate_fin <- resolutions07$author_duplicate_fin %>% 
  str_replace_all("Boris Bignasca", "Bignasca Boris") %>% 
  str_replace_all("Norman Gobbi", "Gobbi Norman") %>% 
  str_replace_all("Attilio Bignasca", "Bignasca Attilio") %>% 
  str_replace_all("Raoul Ghisletta", "Ghisletta Raoul")

#Check again
dups_check <- resolutions07$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions07 <- resolutions07 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, ", ", author_duplicate_fin))


#Fix Bignasca
resolutions07$author_final[which(str_detect(resolutions07$author_original, "A.Bignasca"))] <- paste0(resolutions07$author_final[which(str_detect(resolutions07$author_original, "A.Bignasca"))], ", Bignasca Attilio")
resolutions07$author_final[which(str_detect(resolutions07$author_original, "B.Bignasca"))] <- paste0(resolutions07$author_final[which(str_detect(resolutions07$author_original, "B.Bignasca"))], ", Bignasca Boris")
resolutions07$author_final[which(str_detect(resolutions07$author_original, "MBignasca"))] <- paste0(resolutions07$author_final[which(str_detect(resolutions07$author_original, "MBignasca"))], ", Bignasca Mirto")

#Fix Ghisletta
resolutions07$author_final[which(str_detect(resolutions07$author_original, "Ghisletta D."))] <- paste0(resolutions07$author_final[which(str_detect(resolutions07$author_original, "Ghisletta D."))], ", Ghisletta Dario")
resolutions07$author_final[which(str_detect(resolutions07$author_original, "GhislettaR"))] <- paste0(resolutions07$author_final[which(str_detect(resolutions07$author_original, "GhislettaR"))], ", Ghisletta Raoul")


#Fix Gobbi
resolutions07$author_final[which(str_detect(resolutions07$author_original, "Gobbi Rinal"))] <- paste0(resolutions07$author_final[which(str_detect(resolutions07$author_original, "Gobbi Rinal"))], ", Gobbi Rinaldo")
resolutions07$author_final[which(str_detect(resolutions07$author_original, "Norman Gobbi"))] <- paste0(resolutions07$author_final[which(str_detect(resolutions07$author_original, "Norman Gobbi"))], ", Gobbi Norman")

#Create ID variable
resolutions07$id <- paste0(2007, "-", 1:nrow(resolutions07))

#separete into a row for each author
resolutions07$author_final <- resolutions07$author_final %>% trimws() %>% str_remove_all(",$") %>% str_remove_all("^,")
resolutions07 <- resolutions07 %>% separate_rows(author_final, sep = ",")

resolutions07$author_final <- resolutions07$author_final %>% trimws()

#Remove same author who appears twice for the same resolution
resolutions07 <- resolutions07 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions07 <- resolutions07 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2, author_duplicate_fin,
                                            author_check))


#Merge
resolutions07 <- resolutions07 %>% left_join(mps_07_final)


resolutions07 <- resolutions07 %>% 
  group_by(id, Cognome, Nome) %>% 
  slice(1) %>% 
  ungroup()

# Clean 2011 --------------------------------------------------------
resolutions11 <- resolutions %>% filter(year >= 2011, year <= 2014)

##List of unique mp names
mps_11_unique <- mp_names %>% filter(Anno == 2011) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_11_unique$Cognome <- mps_11_unique$Cognome %>% str_replace_all("-", " ")

mps_11_unique$Cognome[which(str_detect(mps_11_unique$last_first, "Beretta Piccoli Luca"))] <- "Beretta Piccoli Luca"
mps_11_unique$Cognome[which(str_detect(mps_11_unique$last_first, "Beretta-Piccoli Fausto"))] <- "Beretta Piccoli Fausto"

#Remove from author unique 
resolutions11$author_unique <- resolutions11$author_unique %>% str_remove_all(", Beretta Piccoli")
resolutions11$author_unique[which(str_detect(resolutions11$author, "Piccoli F\\."))] <- paste0(resolutions11$author_unique[which(str_detect(resolutions11$author, "Piccoli F\\."))], ", Beretta Piccoli Fausto")
resolutions11$author_unique[which(str_detect(resolutions11$author, "Piccoli Fausto"))] <- paste0(resolutions11$author_unique[which(str_detect(resolutions11$author, "Piccoli Fausto"))], ", Beretta Piccoli Fausto")

resolutions11$author_unique[which(str_detect(resolutions11$author, "Piccoli L\\."))] <- paste0(resolutions11$author_unique[which(str_detect(resolutions11$author, "Piccoli L\\."))], ", Beretta Piccoli Luca")
resolutions11$author_unique[which(str_detect(resolutions11$author, "Piccoli Luca"))] <- paste0(resolutions11$author_unique[which(str_detect(resolutions11$author, "Piccoli Luca"))], ", Beretta Piccoli Luca")


##list of duplicated mp names
mps_11_dups <- mp_names %>% filter(Anno == 2011) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_11_dups$Cognome <- mps_11_dups$Cognome %>% str_replace_all("-", " ")


##Final list of MP names
#Create MP names that match the final one
mps_11_final <- mps_11_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_11_dups %>% 
              mutate(author_final = paste(Cognome, Nome))) %>% 
  bind_rows(mps_11_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1), "."))) %>% 
  bind_rows(mps_11_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1))))

mps_11_final <- mps_11_final %>% select(-c(first_last, last_first, Anno))

##Extract all unique names
resolutions11$author_unique_compound <- resolutions11$author_unique_compound %>% str_replace_all("-", " ")

resolutions11 <- resolutions11 %>% 
  rowwise %>% 
  mutate(author_unique = paste0(author_unique, ", ", author_unique_compound))

resolutions11$author_final <- str_extract_all(resolutions11$author_unique, paste0(mps_11_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

##Make sure we didn't miss anyone
resolutions11 <- resolutions11 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>% 
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ","))
resolutions11$adist <- adist(resolutions11$author_final, resolutions11$author_check) %>% diag

## Try again
resolutions11 <- resolutions11 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>%
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ",") %>%
                                           str_replace_all("Crivelli Barella Claudia", "Crivelli Barella") %>% 
                                           str_replace_all("Beretta Piccoli,", "") %>% 
                                           str_replace_all("Bacchetta Cattori Fabio", "Bacchetta Cattori"))
resolutions11$adist <- adist(resolutions11$author_final, resolutions11$author_check) %>% diag

#Duplicate names
resolutions11 <- resolutions11 %>% 
  rowwise() %>% 
  mutate(author_duplicate_fin = paste0(author_duplicate, ", ", author_duplicate2) %>% trimws %>% str_remove_all(",$") %>% 
           str_remove_all("^,") %>% trimws())

#Check they exist in mp names
dups_check <- resolutions11$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions11$author_duplicate_fin <- resolutions11$author_duplicate_fin %>% 
  str_replace_all("Saverio Lurati", "Lurati Saverio") %>% 
  str_replace_all("Mirto Bignasca", "Bignasca Mirto") %>% 
  str_replace_all("Attilio Bignasca", "Bignasca Attilio")

#Check again
dups_check <- resolutions11$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions11 <- resolutions11 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, ", ", author_duplicate_fin))

#Create ID variable
resolutions11$id <- paste0(2011, "-", 1:nrow(resolutions11))

#separete into a row for each author
resolutions11$author_final <- resolutions11$author_final %>% trimws() %>% str_remove_all(",$") %>% str_remove_all("^,")
resolutions11 <- resolutions11 %>% separate_rows(author_final, sep = ",")

resolutions11$author_final <- resolutions11$author_final %>% trimws()

#Remove same author who appears twice for the same resolution
resolutions11 <- resolutions11 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions11 <- resolutions11 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2, author_duplicate_fin,
                                            author_check))


#Merge
resolutions11 <- resolutions11 %>% left_join(mps_11_final)

resolutions11 <- resolutions11 %>% 
  group_by(id, Cognome, Nome) %>% 
  slice(1) %>% 
  ungroup()

# Clean 2015 --------------------------------------------------------
resolutions15 <- resolutions %>% filter(year >= 2015, year <= 2018)

##List of unique mp names
mps_15_unique <- mp_names %>% filter(Anno == 2015) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_15_unique$Cognome <- mps_15_unique$Cognome %>% str_replace_all("-", " ")


##list of duplicated mp names
mps_15_dups <- mp_names %>% filter(Anno == 2015) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_15_dups$Cognome <- mps_15_dups$Cognome %>% str_replace_all("-", " ")

##Final list of MP names
#Create MP names that match the final one
mps_15_final <- mps_15_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_15_dups %>% 
              mutate(author_final = paste(Cognome, Nome))) %>% 
  bind_rows(mps_15_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1), "."))) %>% 
  bind_rows(mps_15_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1))))

mps_15_final <- mps_15_final %>% select(-c(first_last, last_first, Anno))

##Extract all unique names
resolutions15$author_unique_compound <- resolutions15$author_unique_compound %>% str_replace_all("-", " ")

resolutions15 <- resolutions15 %>% 
  rowwise %>% 
  mutate(author_unique = paste0(author_unique, ", ", author_unique_compound))

resolutions15$author_final <- str_extract_all(resolutions15$author_unique, paste0(mps_15_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

##Make sure we didn't miss anyone
resolutions15 <- resolutions15 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>% 
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ","))
resolutions15$adist <- adist(resolutions15$author_final, resolutions15$author_check) %>% diag

## Try again
resolutions15 <- resolutions15 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>%
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ",") %>%
                                           str_replace_all("Ferrara Micocci Natalia", "Ferrara Micocci") %>% 
                                           str_replace_all("Delcò Petralli Michela", "Delcò Petralli") %>% 
                                           str_replace_all("Lurati Grassi Tatiana", "Lurati Grassi") %>% 
                                           str_replace_all("Delcò Petralli Michela", "Delcò Petralli")
                                           )
resolutions15$adist <- adist(resolutions15$author_final, resolutions15$author_check) %>% diag
gc()


#Duplicate names
resolutions15 <- resolutions15 %>% 
  rowwise() %>% 
  mutate(author_duplicate_fin = paste0(author_duplicate, ", ", author_duplicate2) %>% trimws %>% str_remove_all(",$") %>% 
           str_remove_all("^,") %>% trimws())

#Check they exist in mp names
dups_check <- resolutions15$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique



#Check again
dups_check <- resolutions15$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions15 <- resolutions15 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, ", ", author_duplicate_fin))

#Create ID variable
resolutions15$id <- paste0(2015, "-", 1:nrow(resolutions15))

#separete into a row for each author
resolutions15$author_final <- resolutions15$author_final %>% trimws() %>% str_remove_all(",$") %>% str_remove_all("^,")
resolutions15 <- resolutions15 %>% separate_rows(author_final, sep = ",")

resolutions15$author_final <- resolutions15$author_final %>% trimws()

#Remove same author who appears twice for the same resolution
resolutions15 <- resolutions15 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions15 <- resolutions15 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2, author_duplicate_fin,
                                            author_check))


#Merge
resolutions15 <- resolutions15 %>% left_join(mps_15_final)

resolutions15 <- resolutions15 %>% 
  group_by(id, Cognome, Nome) %>% 
  slice(1) %>% 
  ungroup()

# Clean 2019 --------------------------------------------------------
resolutions19 <- resolutions %>% filter(year >= 2019)

##List of unique mp names
mps_19_unique <- mp_names %>% filter(Anno == 2019) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n == 1) %>% ungroup() %>% select(- n)
mps_19_unique$Cognome <- mps_19_unique$Cognome %>% str_replace_all("-", " ")

mps_19_unique <- mps_19_unique %>% filter(Cognome == "Lepori Sergi") %>% 
  bind_rows(mps_19_unique %>% filter(Cognome == "Ermotti Lepori")) %>% 
  bind_rows(mps_19_unique %>% filter(Cognome == "Lepori")) %>% 
  bind_rows(mps_19_unique %>% filter(!Cognome %in% c("Lepori Sergi", "Ermotti Lepori", "Lepori")))

##list of duplicated mp names
mps_19_dups <- mp_names %>% filter(Anno == 2019) %>% group_by(Cognome) %>% mutate(n = n()) %>% filter(n > 1) %>% ungroup() %>% select(- n)
mps_19_dups$Cognome <- mps_19_dups$Cognome %>% str_replace_all("-", " ")

##Final list of MP names
#Create MP names that match the final one
mps_19_final <- mps_19_unique %>% 
  mutate(author_final = Cognome) %>% 
  bind_rows(mps_19_dups %>% 
              mutate(author_final = paste(Cognome, Nome))) %>% 
  bind_rows(mps_19_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1), "."))) %>% 
  bind_rows(mps_19_dups %>% mutate(author_final = paste0(Cognome, " ", str_sub(Nome, 1, 1))))

mps_19_final <- mps_19_final %>% select(-c(first_last, last_first, Anno))

#Remove teh following because Gianellas have the same first name initial
mps_19_final <- mps_19_final %>% 
  filter(!str_detect(author_final, "Gianella A\\.")) %>% 
  filter(!str_detect(author_final, "Gianella A$"))

##Extract all unique names
resolutions19$author_unique_compound <- resolutions19$author_unique_compound %>% str_replace_all("-", " ")

resolutions19 <- resolutions19 %>% 
  rowwise %>% 
  mutate(author_unique = paste0(author_unique, ", ", author_unique_compound))

resolutions19$author_final <- str_extract_all(resolutions19$author_unique, paste0(mps_19_unique$Cognome, collapse = "|")) %>% paste %>% str_remove_all('\\"') %>% str_remove_all("character\\(0\\)") %>% str_remove_all("NA") %>% 
  str_remove_all("c\\(") %>% str_remove_all("\\)")

##Make sure we didn't miss anyone
resolutions19 <- resolutions19 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>% 
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ","))
resolutions19$adist <- adist(resolutions19$author_final, resolutions19$author_check) %>% diag

## Try again
resolutions19 <- resolutions19 %>% mutate(author_check = author_unique %>% trimws() %>% str_replace_all(",$", "") %>%
                                           str_replace_all("^, ", "") %>% str_replace_all(", ,", ",") %>%
                                           str_replace_all("Lurati Grassi Tatiana", "Lurati Grassi") %>% 
                                           str_replace_all("Crivelli Barella Claudia", "Crivelli Barella")
)
resolutions19$adist <- adist(resolutions19$author_final, resolutions19$author_check) %>% diag

gc()

#Duplicate names
resolutions19 <- resolutions19 %>% 
  rowwise() %>% 
  mutate(author_duplicate_fin = paste0(author_duplicate, ", ", author_duplicate2) %>% trimws %>% str_remove_all(",$") %>% 
           str_remove_all("^,") %>% trimws())

#Check they exist in mp names
dups_check <- resolutions19$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions19$author_duplicate_fin <- resolutions19$author_duplicate_fin %>%
  str_replace_all("Boris Bignasca", "Bignasca Boris") %>%
  str_replace_all("Alessandra Gianella", "Gianella Alessandra")

#Check again
dups_check <- resolutions19$author_duplicate_fin
dups_check <- dups_check %>% str_split(pattern = ",") %>% unlist %>% trimws %>% unique

resolutions19 <- resolutions19 %>% 
  rowwise() %>% 
  mutate(author_final = paste0(author_final, ", ", author_duplicate_fin))

#This doesnt really matter but remove Bignasca Attilio when he's not mentioned but Boris is mentioned (both come from same region so it shouldn't really matter)
resolutions19$author_final[which(str_detect(resolutions19$author_final, "Bignasca Attilio") & 
                                   str_detect(resolutions19$author_original, "Bignasca Boris|Boris Bignasca") & 
                                   (!str_detect(resolutions19$author_original, "Bignasca Attilio")) 
)] <- resolutions19$author_final[which(str_detect(resolutions19$author_final, "Bignasca Attilio") & 
                                   str_detect(resolutions19$author_original, "Bignasca Boris|Boris Bignasca") & 
                                   (!str_detect(resolutions19$author_original, "Bignasca Attilio")) 
                                   )] %>% str_remove_all(", Bignasca Attilio")

#Create ID variable
resolutions19$id <- paste0(2019, "-", 1:nrow(resolutions19))

#separete into a row for each author
resolutions19$author_final <- resolutions19$author_final %>% trimws() %>% str_remove_all(",$") %>% str_remove_all("^,")
resolutions19 <- resolutions19 %>% separate_rows(author_final, sep = ",")

resolutions19$author_final <- resolutions19$author_final %>% trimws()


#Remove same author who appears twice for the same resolution
resolutions19 <- resolutions19 %>% 
  group_by(id, author_final) %>% 
  filter(row_number() == 1)

#Remove unnecessary columns
resolutions19 <- resolutions19 %>% select(-c(author_unique, author_unique_compound, author_duplicate, author_duplicate2, author_duplicate_fin,
                                            author_check))


#Merge
resolutions19 <- resolutions19 %>% left_join(mps_19_final)

resolutions19 <- resolutions19 %>% 
  group_by(id, Cognome, Nome) %>% 
  slice(1) %>% 
  ungroup()

# Bind everything ---------------------------------------------------------
resolutions_final <- resolutions87 %>% 
  bind_rows(resolutions91) %>% 
  bind_rows(resolutions95) %>% 
  bind_rows(resolutions99) %>% 
  bind_rows(resolutions03) %>% 
  bind_rows(resolutions07) %>% 
  bind_rows(resolutions11) %>% 
  bind_rows(resolutions15) %>% 
  bind_rows(resolutions19)

resolutions_final %>% qsave(here("data", "ticino_parliament", "ticino_clean.qs"))
